global root_dir = "`1'"

include "$root_dir/code/config/config.do"

cap noi log using ${log_dir}/df_predicted.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"

capture noi {

qui do ${code_dir}/config/country_list.do
qui do ${code_dir}/config/labeling_indepvars_function.do
qui do ${code_dir}/config/labeling_finalvars.do

* INDEPENDENT VARIABLES


************************************
* Predicted wages, GDPPC, and VAEMP for manufacturing
***********************************


use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year
* Add predicted wages
mmerge year using ${dataset_dir}/indep_vars/lsw_predicted_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/hsw_predicted_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/vaemp_predicted_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/gdppc_predicted_wide_MANUF.dta, unmatched(none)

* weights based on 10-years patent portfolio pre-1980, etc
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
gen missing_weights_1995=(_m==1)
drop _m

*Merge in GDP shares in case of missing patent weights
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
sort BvD year

*set aptent weights with gdp as backup and get weighted values
foreach vv in lswPs hswPs vaempPs gdppcPs {
	foreach ctry of global countrylist1995 {
		confirm variable `vv'_`ctry'
		gen weight_`ctry' = share2_all_1995_`ctry'
		replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .
		gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
		gen ln`vv'_wtd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
	}
	*get total values
	egen `vv'_ALL_1995_wtd = rowtotal(`vv'_wtd_*), missing
	egen ln`vv'_ALL_1995_wtd = rowtotal(ln`vv'_wtd_*), missing

	* Set homecountry and its share including an ln version as function of patent weights
	gen ln`vv'_shr_home_1995_wtd = .
	gen `vv'_shr_home_1995_wtd = .
	egen maxshare = rowmax(weight_??)
	foreach ctry of global countrylist1995 {
		replace ln`vv'_shr_home_1995_wtd = ln`vv'_wtd_`ctry' if maxshare==weight_`ctry'
		replace `vv'_shr_home_1995_wtd = `vv'_wtd_`ctry' if maxshare==weight_`ctry'
	}

	*set foreign shares for the homecountry
	gen ln`vv'_shr_foreign_1995_wtd = ln`vv'_ALL_1995_wtd - ln`vv'_shr_home_1995_wtd
	gen `vv'_shr_foreign_1995_wtd = `vv'_ALL_1995_wtd - `vv'_shr_home_1995_wtd
	replace ln`vv'_shr_foreign_1995_wtd = . if maxshare == 1
	replace `vv'_shr_foreign_1995_wtd = . if maxshare == 1


	* Version 2: take home country wage and average foreign country wage (i.e. normalize) Normalize only for homecountry, not HQ country
	gen ln`vv'_shr2_home_1995_wtd = ln`vv'_shr_home_1995_wtd / maxshare
	gen ln`vv'_shr2_foreign_1995_wtd = ln`vv'_shr_foreign_1995_wtd / (1-maxshare)
	gen `vv'_shr2_home_1995_wtd = `vv'_shr_home_1995_wtd / maxshare
	gen `vv'_shr2_foreign_1995_wtd = `vv'_shr_foreign_1995_wtd / (1-maxshare)
	drop maxshare			
	drop *`vv'_wtd_* weight_??
}
drop share*
drop lsw*_?? hsw*_?? vaemp*_?? gdppc*_??


*labeling
labelingvars "lsw" 
labelingvars "hsw"
labelingvars "vaemp" 
labelingvars "gdppc" 

compress
save ${dataset_dir}/indep_vars/bvd_year_predicted_variables_manuf_sharesgdpweighted${weight_window}_${wtype}.dta, replace



********************************************************
********************** FINAL DATASET********************
********************************************************

*load the standard variables first
use ${dataset_dir}/dep_vars/bvd_year_depvars.dta, clear
mmerge BvD year using ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, unmatched(none)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_country_multinational${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_devgdp_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)

*load the predicted set
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_predicted_variables_manuf_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)

mmerge BvD using ${final_dir}/BvD_industry.dta, unmatched(master)
replace industry = "Other" if _m == 1
drop _m

* ensure missing_weights indicator is correct also for years without wages
bys BvD : egen mw = max(missing_weights_1995)
replace missing_weights_1995 = mw
drop mw


* Logaritihm and averages
*log first
foreach v in lsw hsw gdppc vaemp {
	foreach d in Ps {
		gen `v'`d'_1995_a = ln(`v'`d'_ALL_1995_wtd )
		ren ln`v'`d'_ALL_1995_wtd ln`v'`d'_1995_a
	}
}


* Averages second
foreach vv in lngdpgap { 
	ren `vv'_ALL_1995_wtd `vv'_1995_a
}
	
foreach v in lsw hsw vaemp gdppc {
	foreach d in Ps {
		foreach vr in shr_home shr_foreign shr2_home shr2_foreign {
			gen `v'`d'_`vr'_1995_a = ln(`v'`d'_`vr'_1995_wtd)
		}
	}

}
foreach vv in lngdpgap {
	foreach vr in shr_home shr_foreign shr2_home shr2_foreign {
			ren `vv'_`vr'_1995_wtd `vv'_`vr'_1995_a
	}
}


*normalizing (see DHOZ p26 subnote 34)
qui ds *_shr_home_1995_a
foreach vaar in `r(varlist)' {
	local vv = substr("`vaar'",1,strpos("`vaar'","_")-1)
	if "`vv'" == "lngdpgap" {
		continue
	}
	*get variation
	gen term_home_var = exp(`vv'_shr2_home_1995_a)/exp(`vv'_1995_a)*maxweight_1995
	gen term_foreign_var = exp(`vv'_shr2_foreign_1995_a)/exp(`vv'_1995_a)*(1-maxweight_1995)

	*set only the 1995 values as fixed 
	gen _term_home = term_home_var if year==1995
	gen _term_foreign = term_foreign_var if year==1995
	bys lse_id : egen term_home_fixed = max(_term_home)
	bys lse_id : egen term_foreign_fixed = max(_term_foreign)
	gen `vv'_shr4_home_1995_a = `vv'_shr2_home_1995_a*term_home_fixed
	gen `vv'_shr4_foreign_1995_a = `vv'_shr2_foreign_1995_a*term_foreign_fixed
	drop term_* _term_*
}


drop *_wtd
*merge in the spillovers for our dependent variable here (table A15 has only auto95)
mmerge BvD year using ${dataset_dir}/spillovers/bvd_year_spillovers_auto95_bia.dta, unmatched(master)

drop _m
cap drop *1982*
cap drop *1980*
drop *CNC* CNC* *robo* robo* *pauto* pauto* *autonol* autonol* *autoX* autoX* *tfa_bia* tfa_bia* *any* *auto90* auto90*  *auto80* auto80* *autm* autm*
keep if missing_weights_1995 == 0 & year >= 1991 & maxweight_1995 < 1

*labeling
labelingvarsfinal "lsw"
labelingvarsfinal "hsw"
labelingvarsfinal "vaemp"
labelingvarsfinal "gdppc"
labelingvarsfinal "lngdpgap"

compress
save ${final_dir}/regression_dataset${weight_window}_${wtype}_predicted.dta, replace

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat